# politicians' theories of voting - replication code 
library(rio)
library(marginaleffects)
library(modelsummary)
library(nnet)
library(stargazer)
library(tidyverse)
library(psych)
library(poLCA)
library(foreach)
library(doParallel)
library(patchwork)
library(lme4)
library(broom)
# set your working directory to the folder containing the data files
setwd("")
questions <- rio::import("question labels.xlsx") %>% arrange(order)
c.labels <- rio::import("country labels.xlsx")

###############################################################################
# Figure 1: Theory Questions: Distribution of Politician and Citizen Responses

# data
all.resp <- readRDS("data_allresp.RDS")

# pooled plot
b.pooled <- all.resp %>% group_by(question, type, value) %>% 
  summarise(n = n()) %>% mutate(percent = n / sum(n))
b.pooled$short_label_3 <- factor(b.pooled$question, labels=c("Unfair (0) vs.\nFair (10)", "Policy (0) vs.\nIdentity (10)", "Future (0) vs.\nPast (10)", "Sociotropic (0) vs.\nEgocentric (10)", "Single-Issue (0) vs.\nMany-Issue (10)", "Ideas (0) vs.\nLeaders (10)", "Knowledge (0) vs.\nIgnorance (10)", "Short-term (0) vs.\nLong-term (10)"))
p1 <- ggplot(b.pooled, aes(x=value, y=percent, fill=type)) + 
  geom_bar(stat="identity", alpha=0.8) + facet_grid(type~short_label_3) + 
  xlab("") + ylab("") + scale_fill_manual(values=c("#1b9e77", "#7570b3")) + 
  scale_x_continuous(breaks=c(0,5,10)) + 
  scale_y_continuous(limits=c(0,0.35), labels=scales::percent, expand=c(0,0)) + 
  theme_minimal() + theme(legend.title = element_blank(),
                          legend.position = "none",
                          strip.text = element_text(hjust=0, vjust=0, size=10),
                          panel.border = element_rect(fill=NA, linewidth=0.5)) + 
  ggtitle("A. Pooled Data")

# country data 
all.resp$short_label_3 <- factor(all.resp$question, labels=c("Unfair (0) vs.\nFair (10)", "Policy (0) vs.\nIdentity (10)", "Future (0) vs.\nPast (10)", "Sociotropic (0) vs.\nEgocentric (10)", "Single-Issue (0) vs.\nMany-Issue (10)", "Ideas (0) vs.\nLeaders (10)", "Knowledge (0) vs.\nIgnorance (10)", "Short-term (0) vs.\nLong-term (10)"))
all.resp$clabel[all.resp$clabel=="Belgium (Flanders)"] <- "Belgium"
p2 <- ggplot(all.resp, aes(x=value, group=type, fill=type)) + 
  geom_density(alpha=0.5, adjust=1.5) + facet_grid(clabel~short_label_3) + 
  xlab("") + ylab("") + scale_fill_manual(values=c("#1b9e77", "#7570b3")) + 
  scale_x_continuous(breaks=c(0,5,10)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          legend.position = "bottom", legend.title = element_blank(),
                          legend.text = element_text(size=18),
                          strip.text = element_text(hjust=0, vjust=0, size=10)) + 
  ggtitle("B. Distributions by Country")

# combine and export
p1 / p2 + plot_layout(heights=c(1,5))
ggsave("maintext_figure1.pdf", width=12, height=15)

###############################################################################
# Figure 2: Differences between Politicians and Citizens.

# set up data 
mdat <- all.resp %>% filter(Country!=14) %>% filter(Country!=8)

# pooled model, country FEs
pmod1 <- mdat %>% group_by(short_label) %>% 
  do(model = tidy(lm(value ~ type + factor(Country), data=.), conf.int=T)) %>%
  unnest(model) %>% filter(term=="typePoliticians") %>% 
  mutate(sig = ifelse(p.value>0.05, 1, ifelse(estimate<0,2,3)))

# plot differences
p1 <- ggplot(pmod1, aes(x=reorder(short_label, estimate), y=estimate, ymin=conf.low, ymax=conf.high, color=factor(sig))) + 
  geom_hline(yintercept=0, linetype="dotted") + 
  geom_pointrange() + coord_flip() + xlab("") + ylab("") + 
  scale_x_discrete(labels = scales::label_wrap(50)) +
  scale_y_continuous(limits=c(-1.75, 1.75)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "none", legend.title = element_blank()) + 
  scale_color_manual(values = c("gray","black","black"))

# models by country
cmod1 <- mdat %>% group_by(short_label, clabel) %>%
  do(model = tidy(lm(value ~ type, data=.), conf.int=T)) %>% 
  unnest(model) %>% filter(term=="typePoliticians") %>% 
  mutate(sig = ifelse(p.value>0.05, 1, ifelse(estimate<0,2,3)))
order <- pmod1 %>% arrange(-estimate) %>% pull(short_label)
cmod1$short_label <- factor(cmod1$short_label, levels=order)
cmod1$clabel[cmod1$clabel=="Belgium (Flanders)"] <- "Belgium"
cmod1$short_label_3 <- factor(cmod1$short_label, labels=c("Ideas v.\nLeaders", "Sociotropic v.\nEgocentric", "Policy v.\nIdentity", "Knowledge v.\nIgnorance", "Future v.\nPast", "Unfair v.\nFair", "Short-Term v.\nLong-Term", "Single-issue v.\nMany-issue"))
p2 <- ggplot(cmod1, aes(x=reorder(clabel, desc(clabel)), y=estimate, ymin=conf.low, ymax=conf.high, color=factor(sig))) + 
  geom_hline(yintercept=0, linetype="dotted") + 
  geom_pointrange() + coord_flip() + xlab("") + ylab("") + 
  scale_y_continuous(limits=c(-3,3)) + 
  scale_x_discrete(labels = scales::label_wrap(50)) +
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          strip.text = element_text(size=10, hjust=0),
                          legend.position = "none", legend.title = element_blank()) + 
  facet_wrap(~short_label_3, nrow=2) + 
  scale_color_manual(values = c("gray","black","black"))

# combine and export 
p1 / p2 + plot_layout(heights=c(1,4))
ggsave("maintext_figure2.pdf", width=8.5, height=8)

# pooled model (for table) 
pmod1.appendix <- mdat %>% group_by(short_label) %>% 
  do(model = lm(value ~ type + factor(clabel), data=.)) %>%
  pull(model)
theories <- unique(mdat$short_label)
stargazer(pmod1.appendix, type="text",
          omit.stat = c("rsq","ser","f"),
          covariate.labels = c("Politician", "Belgium", "Canada", "Czechia", "Denmark", "Germany", "Israel", "Netherlands", "Portugal", "Sweden", "Switzerland"),
          column.labels = theories,
          dep.var.caption = c(""),
          title = "Citizen-Politician Comparison: Pooled Data",
          label = "tab:appCompare1",
          column.sep.width = "1pt",
          header=F,
          dep.var.labels.include = F)

# models by country (for tables)
cmod.tabs <- mdat %>% group_by(short_label, clabel) %>%
  do(model = lm(value ~ type, data=.))
countries <- unique(all.resp$clabel)
vars <- unique(all.resp$short_label)
stargazer(cmod.tabs$model[1:11], out="sm_table_SM7.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[1]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare1", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[12:22], out="sm_table_SM8.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[2]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare2", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[23:33], out="sm_table_SM9.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[3]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare3", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[34:44], out="sm_table_SM10.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[4]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare4", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[45:55], out="sm_table_SM11.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[5]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare5", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[56:66], out="sm_table_SM12.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[6]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare6", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[67:77], out="sm_table_SM13.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[7]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare7", column.sep.width = "1pt", header=F, dep.var.labels.include = F)
stargazer(cmod.tabs$model[78:88], out="sm_table_SM14.tex", column.labels = countries, omit.stat = c("rsq","ser","f"), covariate.labels = c("Politicians"), dep.var.caption = c(vars[8]), title = "Citizen-Politician Comparison by Country", label = "tab:appCompare8", column.sep.width = "1pt", header=F, dep.var.labels.include = F)

###############################################################################
# Figure 3: Politician and Citizen Membership in Four Latent Theory Types

# prepare data for LCA
cdat <- readRDS("data_LCA.RDS")
lca.prepped <- cdat %>% dplyr::select(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78) %>%
  mutate(across(everything(),function(x)c(car::recode(x, "0:4=1; 5=2; 6:10=3"))))

# prepare cores for parallel processing
totalCores = detectCores()
cluster <- makeCluster(totalCores[1]-1) 
registerDoParallel(cluster)

# run LCA for 2-class through 20-class solutions
# this code takes several minutes to run on a new MBP (when parallelized)
# alternative option: skip this step and use saved results in 
set.seed(123)
lca.allcat <- foreach(i = 2:20) %dopar% {
  poLCA::poLCA(cbind(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78)~1,lca.prepped,nclass=i,nrep=5)
}
saveRDS(lca.allcat, file="data_LCA_results.RDS")

# load LCA results (feel free to skip previous lines and use this file)
lca.allcat <- readRDS("data_LCA_results.RDS")

# figure out which class is which for labels 
lca.prepped$solution <- lca.allcat[[3]]$predclass
cdat$solution <- lca.allcat[[3]]$predclass
lca.prepped$type <- cdat$type
class.labeller <- lca.prepped %>% filter(type=="Politicians") %>% 
  group_by(solution) %>% summarise(n = n()) %>% mutate(percent = n / sum(n)) %>% 
  arrange(-percent) %>% mutate(class.label = c("Democratic\nRealism", "Democratic\nOptimism", "Undecided", "Inattentive")) %>%
  arrange(solution)
class.labels <- class.labeller$class.label

# plot 1: bar graph for citizens and politicians (4-class solution)
lca.props <- lca.prepped %>% group_by(type, solution) %>% 
  summarise(n = n()) %>% mutate(percent = n / sum(n))
lca.props$solution <- factor(lca.props$solution, labels=class.labels)
lca.props$solution <- factor(lca.props$solution, levels=c("Democratic\nOptimism", "Democratic\nRealism", "Undecided", "Inattentive"))
p1 <- ggplot(lca.props, aes(x=solution, y=percent, fill=type)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = scales::percent(percent, accuracy=1L)), vjust=-0.5) + 
  facet_wrap(~type) + scale_fill_manual(values=c("#1b9e77", "#7570b3")) + 
  scale_y_continuous(expand=c(0,0), labels=scales::percent, limits=c(0,0.9)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          strip.text = element_text(hjust=0, vjust=0, size=10),
                          axis.text = element_text(size=10), legend.position = "none") + 
  xlab("") + ylab("") + ggtitle("A. Class Membership, Citizens and Politicians")

# plot 2: distributions with means 
lca.dist <- cdat %>% mutate(solution =  lca.allcat[[3]]$predclass) %>% 
  pivot_longer(cols = contains("Q")) %>% 
  rename(question = name) %>% 
  left_join(., questions, by="question") %>% 
  group_by(solution, question) %>% mutate(class.mean = mean(value))
lca.dist$solution <- factor(lca.dist$solution, labels=class.labels)
lca.dist$solution <- factor(lca.dist$solution, levels=c("Democratic\nOptimism", "Democratic\nRealism", "Undecided", "Inattentive"))
lca.dist$short_label_3 <- factor(lca.dist$question, labels=c("Unfair (0) vs.\nFair (10)", "Policy (0) vs.\nIdentity (10)", "Future (0) vs.\nPast (10)", "Sociotropic (0) vs.\nEgocentric (10)", "Single-Issue (0) vs.\nMany-Issue (10)", "Ideas (0) vs.\nLeaders (10)", "Knowledge (0) vs.\nIgnorance (10)", "Short-term (0) vs.\nLong-term (10)"))
p2 <- ggplot(lca.dist, aes(x=value)) + 
  geom_bar() + 
  geom_vline(xintercept=5, linetype="dotted") + 
  facet_grid(solution~short_label_3) + 
  scale_y_continuous(expand=c(0,0), limits=c(0,1500)) + 
  scale_x_continuous(breaks=c(0,5,10)) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=10)) +
  ggtitle("B. Distribution of Responses, by Class")
p1 / p2 + plot_layout(heights=c(1,4))
ggsave("maintext_figure3.pdf", width=12, height=10)

###############################################################################
# Table SM5 - Citizen-Politician Comparison

# multinomial logit model
mlogit.data <- lca.prepped %>% dplyr::select(solution, type)
mlogit.data$country <- cdat$Country
mlogit.data$solution <- factor(mlogit.data$solution, labels=class.labels)
mlogit.data$solution <- factor(mlogit.data$solution, levels=c("Democratic\nOptimism", "Democratic\nRealism", "Undecided", "Inattentive"))
lmod1 <- multinom(solution ~ type + factor(country), data=mlogit.data)
stargazer(lmod1, out="sm_table_SM5.tex",
          omit.stat = c("rsq","ser","f"),
          covariate.labels = c("Politician", "Belgium", "Canada", "Czechia", "Denmark", "Germany", "Israel", "Netherlands", "Portugal", "Sweden", "Switzerland"),
          column.labels = c("Democratic Realist", "Undecided", "Inattentive"),
          dep.var.caption = c(""),
          title = "Citizen-Politician Comparison (Base = Democratic Optimism)",
          label = "tab:mlogit",
          column.sep.width = "1pt",
          header=F,
          dep.var.labels.include = F)

###############################################################################
# Figure 4: Politicians' LCA Types, by Country
# Figure SM9: Politicians' LCA Types, by Country

lca.prepped$Country <- cdat$Country

# set up 
lca.bycountry <- lca.prepped %>% left_join(., c.labels) %>% 
  filter(type == "Politicians") %>% group_by(clabel, solution) %>% 
  summarise(n = n()) %>% mutate(percent = n/sum(n))
lca.bycountry$clabel[lca.bycountry$clabel=="Belgium (Flanders)"] <- "Belgium"

# plot
solution.number <- class.labeller %>% filter(class.label=="Democratic\nRealism") %>% pull(solution)
order <- lca.bycountry %>% filter(solution==solution.number) %>% arrange(-percent) %>% pull(clabel)
lca.bycountry$solution <- factor(lca.bycountry$solution, labels=class.labels)
lca.bycountry$solution <- factor(lca.bycountry$solution, levels=c("Democratic\nRealism", "Democratic\nOptimism", "Undecided", "Inattentive"))
lca.bycountry$clabel <- factor(lca.bycountry$clabel, levels=order)
ggplot(lca.bycountry %>% filter(solution=="Democratic\nRealism" | solution=="Democratic\nOptimism"), aes(x=clabel, y=percent)) + 
  geom_bar(stat="identity", fill="#7570b3") + 
  geom_text(aes(label = scales::percent(percent, accuracy=1L), y=0.04)) + 
  facet_wrap(~solution, nrow=4) +
  scale_y_continuous(expand=c(0,0), limits=c(0,1), labels=scales::percent) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=10)) 
ggsave("maintext_figure4.pdf", width=10, height=5)  

# for SM
ggplot(lca.bycountry, aes(x=clabel, y=percent)) + 
  geom_bar(stat="identity", fill="#7570b3") + 
  geom_text(aes(label = scales::percent(percent, accuracy=1L), y=0.07)) + 
  facet_wrap(~solution, nrow=4) +
  scale_y_continuous(expand=c(0,0), limits=c(0,1), labels=scales::percent) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=10)) 
ggsave("sm_figure_SM9.pdf", width=10, height=8)  

###############################################################################
# Figures SM1 & SM2: Item Correlations

# citizens
cdat.corr <- cdat %>% filter(type=="Citizens") %>% dplyr::select(contains("Q"))
colnames(cdat.corr) <- questions %>% arrange(question) %>% pull(short_label)
pairs.panels(cdat.corr)

# politicians
pdat.corr <- cdat %>% filter(type=="Politicians") %>% dplyr::select(contains("Q"))
colnames(pdat.corr) <- questions %>% arrange(question) %>% pull(short_label)
pairs.panels(pdat.corr)

###############################################################################
# Figure SM3: Local Politicians vs. National Politicians

local <- readRDS("data_localpols.RDS")
q.labels <- questions

# models: local pols
local.mods <- local %>% group_by(question) %>% 
  do(model = broom::tidy(lm(value ~ type, data=.), conf.int=T)) %>%
  unnest(model) %>% filter(term=="typePoliticians") %>% mutate(sig = ifelse(p.value<0.05,1,0)) %>% 
  left_join(., q.labels) %>%
  mutate(level = "Unmonitored") %>% dplyr::select(short_label, estimate, conf.low, conf.high, sig, level)

# models: national pols
nat.mods <- all.resp %>% filter(Country==5) %>% group_by(short_label) %>% 
  do(model = tidy(lm(value ~ type, data=.), conf.int=T)) %>% 
  unnest(model) %>% filter(term=="typePoliticians") %>% 
  mutate(sig = ifelse(p.value>0.05, 1, ifelse(estimate<0,2,3))) %>% 
  mutate(level = "Monitored") %>%
  dplyr::select(short_label, estimate, conf.low, conf.high, sig, level) 

# plot
mods.levelcompare <- rbind(local.mods, nat.mods)
ggplot(mods.levelcompare, aes(x=reorder(short_label, estimate), y=estimate, ymin=conf.low, ymax=conf.high, color=level, group=level)) + 
  geom_pointrange(position = position_dodge(width=0.5)) + coord_flip() + xlab("") + ylab("") + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "bottom", legend.title = element_blank()) + 
  scale_color_manual(values = c("gray","black")) 
ggsave("sm_figure_SM3.pdf", width=8, height=4)

###############################################################################
# Table SM4: Response Timing 

timing <- readRDS("data_timing.RDS")
m1 <- lm(q1 ~ type, data=timing)
m2 <- lm(q2 ~ type, data=timing)
stargazer(m1, m2, out="sm_tableSM4.tex",
          omit.stat = c("rsq","ser","f"),
          covariate.labels = c("Politician"),
          column.labels = c("Block 1", "Block 2"),
          dep.var.caption = c(""),
          title = "Response Timing Comparison",
          label = "tab:timing",
          column.sep.width = "1pt",
          header=F,
          dep.var.labels.include = F)

###############################################################################
# Figures SM4 & SM5: Response Extremity

# politicians vs. citizens response extremity test
scale.test <- all.resp %>% 
  mutate(choice = car::recode(value, "0=5; 1=4; 2=3; 3=2; 4=1; 5=0; 6=1; 7=2; 8=3; 9=4; 10=5"))
scale.mod <- multinom(choice ~ type, scale.test)
scale.margins <- avg_slopes(scale.mod)
scale.margins$group <- as.numeric(scale.margins$group)
p1 <- ggplot(scale.margins, aes(x=group, y=estimate, ymin=conf.low, ymax=conf.high, group=1)) + 
  geom_hline(yintercept = 0, linetype="dotted") + 
  geom_line(color="gray") + geom_pointrange() + 
  scale_y_continuous(limits=c(-0.06, 0.06)) + 
  ylab("Pr(Selection)") + xlab("Extremity of Choice") + 
  theme_minimal() + theme(legend.title = element_blank(),
                          legend.position = "none",
                          strip.text = element_text(hjust=0, vjust=0, size=10),
                          panel.border = element_rect(fill=NA, linewidth=0.5)) +
  ggtitle("Politicians (1) vs. Citizens (0)")

# use of scale range among citizens: recode data for this test
c.scale <- readRDS("data_citizen_extremity.RDS")
scale.edu <- multinom(choice ~ degree, data = c.scale)
scale.gender <- multinom(choice ~ woman, data = c.scale)
scale.age <- multinom(choice ~ factor(age), data = c.scale)
scale.margins.edu <- avg_slopes(scale.edu) %>% mutate(var = "University Degree")
scale.margins.gender <- avg_slopes(scale.gender) %>% mutate(var = "Gender (Woman)")
scale.margins.age <- avg_slopes(scale.age) %>% filter(contrast == "3 - 1") %>% mutate(var = "Age (65+ vs. 18-34)")
scale.margins.all <- rbind(scale.margins.edu, scale.margins.gender, scale.margins.age)
p2 <- ggplot(scale.margins.all, aes(x=group, y=estimate, ymin=conf.low, ymax=conf.high, group=1)) + 
  geom_hline(yintercept = 0, linetype="dotted") +
  facet_wrap(~var) + 
  scale_y_continuous(limits=c(-0.06, 0.06)) + 
  geom_line(color="gray") + geom_pointrange() + 
  ylab("Pr(Selection)") + xlab("Extremity of Choice") + 
  theme_minimal() + theme(legend.title = element_blank(),
                          legend.position = "none",
                          strip.text = element_text(hjust=0, vjust=0, size=10),
                          panel.border = element_rect(fill=NA, linewidth=0.5)) + 
  ggtitle("Demographic Groups (Citizen Responses)")
p1 / p2
ggsave("sm_figure_SM4.pdf", width=8.5, height=8)

# robustness test: citizen-politician differences in recoded data
recode.test <- all.resp %>% 
  mutate(choice = car::recode(value, "0:4=1; 5=2; 6:10=3"))
recode.split <- split(recode.test, recode.test$question)
recode.mods <- lapply(seq_along(recode.split), function(i){
  df.tmp <- recode.split[[i]]
  model <- multinom(choice ~ type + factor(Country), data=df.tmp)
  modelplot(model, coef_map = c('typePoliticians' = 'Politician'))$data
})
recode.mods <- bind_rows(recode.mods)
recode.mods$question <- rep(unique(recode.test$question), each=2)
recode.mods$value <- rep(c(0,1), times=8)
recode.mods <- left_join(recode.mods, q.labels)
recode.mods %>% filter(value==1) %>% 
  ggplot(., aes(x=reorder(short_label_2, estimate), y=estimate, ymin=conf.low, ymax=conf.high)) + 
  geom_hline(yintercept=0, linetype="dotted") + 
  geom_pointrange() + coord_flip() + xlab("") + ylab("") + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "none", legend.title = element_blank()) 
ggsave("sm_figure_SM5.pdf", width=8.5, height=4)  

###############################################################################
# Figures SM6 & SM7: Citizen Responses; Self-Description vs. Description of All Voters

cso <- readRDS("data_cso.RDS")

# model differences (politician v. citizen) and plot
cso$type <- factor(cso$type, levels=c("Politician", "Citizen (Self)", "Citizen (General)"))
diffs1 <- cso %>% group_by(name) %>% 
  do(model = broom::tidy(lm(value ~ type + factor(Country), data=.), conf.int=T)) %>%
  unnest(model) %>% filter(grepl("type", term)) %>% 
  rename(question = name) %>% left_join(., q.labels)
diffs1$term <- factor(diffs1$term, labels=c("Politician vs.Citizen\n(General Description)", "Politician vs.Citizen\n(Self-Description)"))
ggplot(diffs1, aes(x=reorder(short_label, estimate), y=estimate, ymin=conf.low, ymax=conf.high, group=term, color=term)) + 
  geom_hline(yintercept=0, linetype="dotted") + 
  geom_pointrange() + coord_flip() + xlab("") + ylab("") + 
  scale_color_manual(values=c("black", "orange")) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "bottom", legend.title = element_blank()) 
ggsave("sm_figure_SM7.pdf", width=8.5, height=4)

# model differences (the two citizen types) and plot
cso$type <- factor(cso$type, levels=c("Citizen (General)", "Citizen (Self)", "Politician"))
diffs2 <- cso %>% group_by(name) %>% 
  do(model = broom::tidy(lm(value ~ type + factor(Country), data=.), conf.int=T)) %>%
  unnest(model) %>% filter(term=="typeCitizen (Self)") %>% 
  rename(question = name) %>% left_join(., q.labels)
ggplot(diffs2, aes(x=reorder(short_label, estimate), y=estimate, ymin=conf.low, ymax=conf.high, group=term, color=term)) + 
  geom_hline(yintercept=0, linetype="dotted") + 
  geom_pointrange() + coord_flip() + xlab("") + ylab("") + 
  scale_color_manual(values=c("black", "orange")) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "none", legend.title = element_blank()) 
ggsave("sm_figure_SM6.pdf", width=8.5, height=4)

###############################################################################
# Figure SM8: Citizen Responses; Self-Description vs. Description of All Voters

lca.probs <- readRDS("data_lca_probs.RDS")

# probability estimates for politicians
prob.pol <- lca.probs %>% filter(type == "Politicians") %>% dplyr::select(1:4) %>%
  pivot_longer(cols = c(1:4)) %>% group_by(name) %>% 
  do(model = broom::tidy(lm(value ~ 1, data=.), conf.int=T)) %>% unnest(model) %>%
  mutate(type = "Politicians", class = c("Democratic Optimist", "Democratic Realist", "Inattentive", "Undecided"))

# probability estimates: citizen demographics 
prob.edu <- lca.probs %>% filter(!is.na(degree)) %>% dplyr::select(1:4, degree) %>% 
  pivot_longer(cols = c(1:4)) %>% group_by(name, degree) %>% 
  do(model = broom::tidy(lm(value ~ 1, data=.), conf.int=T)) %>% unnest(model) %>%
  mutate(type = rep(c("Degree (No)", "Degree (Yes)"), times=4), class = rep(c("Democratic Optimist", "Democratic Realist", "Inattentive", "Undecided"), each=2))
prob.age <- lca.probs %>% filter(!is.na(age)) %>% dplyr::select(1:4, age) %>%
  pivot_longer(cols = c(1:4)) %>% group_by(name, age) %>% 
  do(model = broom::tidy(lm(value ~ 1, data=.), conf.int=T)) %>% unnest(model) %>%
  mutate(type = rep(c("Age (18-34)", "Age (35-64)","Age (65+)"), times=4), class = rep(c("Democratic Optimist", "Democratic Realist", "Inattentive", "Undecided"), each=3))
prob.gender <- lca.probs %>% filter(!is.na(woman)) %>% dplyr::select(1:4, woman) %>%
  pivot_longer(cols = c(1:4)) %>% group_by(name, woman) %>% 
  do(model = broom::tidy(lm(value ~ 1, data=.), conf.int=T)) %>% unnest(model) %>%
  mutate(type = rep(c("Woman (No)", "Woman (Yes)"), times=4), class = rep(c("Democratic Optimist", "Democratic Realist", "Inattentive", "Undecided"), each=2))
probs <- bind_rows(prob.pol, prob.edu, prob.age, prob.gender)
probs$type <- factor(probs$type, levels=c("Woman (No)", "Woman (Yes)", "Age (18-34)", "Age (35-64)", "Age (65+)", "Degree (No)", "Degree (Yes)", "Politicians"))
probs$elite <- ifelse(probs$type=="Politicians",1,0)
ggplot(probs, aes(x=type, y=estimate, ymin = conf.low, ymax=conf.high, fill=factor(elite))) + 
  geom_bar(stat="identity") + facet_wrap(~class, nrow=4) + 
  geom_text(aes(y=0.85, label = scales::percent(estimate, accuracy=1L))) + 
  scale_y_continuous(limits=c(0,1), labels=scales::percent, expand=c(0,0)) + 
  geom_linerange(color="black") + ylab("") + xlab("") + 
  scale_fill_manual(values=c("#a6cee3", "#b2df8a")) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth=0.5),
                          axis.text.y = element_text(size=10),
                          legend.position = "none", legend.title = element_blank()) 
ggsave("sm_figure_SM8.pdf", width=8.5, height=8)

###############################################################################
# Figure SM10: Intra-Class Correlation Coefficients

# function to calculate ICCs 
icc.function <- function(x){
  variance <- as.data.frame(VarCorr(x)) 
  variance <- variance %>% dplyr::select(vcov)
  variance
  sigma_j_sq <- variance[1,1]
  sigma_i_sq <- variance[2,1]
  sigma_j_sq / (sigma_j_sq + sigma_i_sq)
}

# calculate iccs
questions <- unique(all.resp$question)
iccs <- NULL
for(i in 1:8){
  tmp.pol <- all.resp %>% filter(type=="Politicians") %>% filter(question == questions[i])
  tmp.cit <- all.resp %>% filter(type=="Citizens") %>% filter(question == questions[i])
  tmp.all <- all.resp %>% filter(question == questions[i])
  mod1 <- lmer(value ~ (1|Country), data=tmp.pol)
  mod2 <- lmer(value ~ (1|Country), data=tmp.cit)
  mod3 <- lmer(value ~ (1|Country), data=tmp.all)
  iccs <- rbind(iccs, data.frame(question = questions[i], Politicians = icc.function(mod1), Citizens = icc.function(mod2), Pooled = icc.function(mod3)))
}
iccs <- left_join(iccs, q.labels)

# plot ICCs
iccs.long <- iccs %>% pivot_longer(cols = c(Politicians, Citizens, Pooled))
iccs.long$name <- factor(iccs.long$name, levels=c("Pooled", "Politicians", "Citizens"))
ggplot(iccs.long, aes(x=value, y=short_label)) + 
  geom_bar(stat="identity") + facet_wrap(~name) + 
  ylab("") + xlab("Intra-Class Correlation Coefficient") + 
  scale_x_continuous(limits=c(0,0.45), expand=c(0,0), breaks=c(0,0.1,0.2,0.3,0.4)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5),
                          axis.text.y = element_text(size=8))
ggsave("sm_figure_SM10.pdf", width=8.5, height=3)  

###############################################################################
# Figure SM11: Latent Class Analysis Fit Statistics

# calculate fit statistics
lca.fit <- lapply(seq_along(lca.allcat), function(i){
  tibble(classes = i + 1, llik = lca.allcat[[i]]$llik,
         aic = lca.allcat[[i]]$aic, bic = lca.allcat[[i]]$bic)
})
lca.fit <- bind_rows(lca.fit) %>% pivot_longer(cols = c(2:4))
lca.fit$name <- factor(lca.fit$name, labels=c("AIC", "BIC", "Log Likelihood"))

# plot results
ggplot(lca.fit, aes(x=classes, y=value)) + 
  xlab("Number of Latent Classes") + ylab("") +
  geom_point() + geom_line() + facet_wrap(~name, scales="free", nrow=3) + 
  scale_x_continuous(breaks=c(2:20)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5),
                          strip.text = element_text(hjust=0, vjust=0))
ggsave("sm_figure_SM11.pdf", width=8.5, height=6)

###############################################################################
# Figure SM12: Eight-Class LCA Solution

lca.prepped$solution <- lca.allcat[[7]]$predclass
lca.prepped$type <- cdat$type
lca.props <- lca.prepped %>% group_by(type, solution) %>% 
  summarise(n = n()) %>% mutate(percent = n / sum(n))
p1 <- ggplot(lca.props, aes(x=solution, y=percent)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = scales::percent(percent, accuracy=0.1)), vjust=-0.5) + 
  facet_wrap(~type) + 
  scale_x_continuous(breaks=c(1:8)) + 
  scale_y_continuous(expand=c(0,0), labels=scales::percent, limits=c(0,0.9)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5),
                          strip.text = element_text(hjust=0, vjust=0, size=8)) + 
  xlab("") + ylab("") + ggtitle("Class Membership, Citizens and Politicians")

# plot 2: distributions with means 
lca.dist <- cdat %>% mutate(solution =  lca.allcat[[7]]$predclass) %>% 
  pivot_longer(cols = contains("Q")) %>% 
  rename(question = name) %>% 
  left_join(., q.labels, by="question") %>% 
  group_by(solution, question) %>% mutate(class.mean = mean(value))
lca.dist$short_label_3 <- factor(lca.dist$question, labels=c("Unfair (0) vs.\nFair (10)", "Policy (0) vs.\nIdentity (10)", "Future (0) vs.\nPast (10)", "Sociotropic (0) vs.\nEgocentric (10)", "Single-Issue (0) vs.\nMany-Issue (10)", "Policy Ideas (0) vs.\nLeaders (10)", "Knowledge (0) vs.\nIgnorance (10)", "Short-term (0) vs.\nLong-term (10)"))
p2 <- ggplot(lca.dist, aes(x=value)) + 
  geom_bar() + 
  geom_vline(xintercept=5, linetype="dotted") + 
  facet_grid(solution~short_label_3) + 
  scale_y_continuous(expand=c(0,0)) + 
  scale_x_continuous(breaks=c(0,5,10)) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=8)) +
  ggtitle("Distribution of Responses, by Class")
p1 / p2 + plot_layout(heights=c(1,4))
ggsave("sm_figure_SM12.pdf", width=8.5, height=11)

###############################################################################
# Figure SM13: Summary of LCA with Recoded Theory Items

lca.prepped.five <- cdat %>% 
  filter(Country!=14) %>% filter(Country!=8) %>%
  dplyr::select(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78) %>%
  mutate(across(everything(),function(x)c(car::recode(x, "0:2=1; 3:4=2; 5=3; 6:7=4; 8:10=5"))))
lca.five <- poLCA::poLCA(cbind(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78)~1,lca.prepped.five,nclass=4,nrep=5)

# plot results 
lca.prepped.five$solution <- lca.five$predclass
lca.prepped.five$type <- cdat$type
lca.props.five <- lca.prepped.five %>% group_by(type, solution) %>% 
  summarise(n = n()) %>% mutate(percent = n / sum(n))
p1 <- ggplot(lca.props.five, aes(x=solution, y=percent)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = scales::percent(percent, accuracy=1L)), vjust=-0.5) + 
  facet_wrap(~type) + 
  scale_y_continuous(expand=c(0,0), labels=scales::percent, limits=c(0,0.9)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5),
                          strip.text = element_text(hjust=0, vjust=0, size=10),
                          axis.text = element_text(size=10)) + 
  xlab("") + ylab("") + ggtitle("A. Class Membership, Citizens and Politicians")
cdat$solution <- lca.five$predclass
fiveclass.long <- cdat %>% pivot_longer(contains("Q"), names_to = "question")
fiveclass.long$short_label_3 <- factor(fiveclass.long$question, labels=c("Unfair (0) vs.\nFair (10)", "Policy (0) vs.\nIdentity (10)", "Future (0) vs.\nPast (10)", "Sociotropic (0) vs.\nEgocentric (10)", "Single-Issue (0) vs.\nMany-Issue (10)", "Ideas (0) vs.\nLeaders (10)", "Knowledge (0) vs.\nIgnorance (10)", "Short-term (0) vs.\nLong-term (10)"))
p2 <- ggplot(fiveclass.long, aes(x=value)) + 
  geom_bar() + 
  geom_vline(xintercept=5, linetype="dotted") + 
  facet_grid(solution~short_label_3) + 
  scale_y_continuous(expand=c(0,0), limits=c(0,1500)) + 
  scale_x_continuous(breaks=c(0,5,10)) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=10)) +
  ggtitle("B. Distribution of Responses, by Class")
p1 / p2 + plot_layout(heights=c(1,4))
ggsave("sm_figure_SM13.pdf", width=12, height=10)

###############################################################################
# Figure SM14: Alternative Model: Hierarchical Cluster Analysis

cdat <- readRDS("data_LCA.RDS")
cdat.prepped <- cdat %>% dplyr::select(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78)
hc <- hclust(dist(cdat.prepped), method="complete")

# create data with four-class solution 
cdat$class <- cutree(hc, 4)

# plot 1: bar graph for citizens and politicians 
cluster.props <- cdat %>% group_by(type, class) %>% 
  summarise(n = n()) %>% mutate(percent = n / sum(n))
p1 <- ggplot(cluster.props, aes(x=class, y=percent)) + 
  geom_bar(stat="identity") + 
  geom_text(aes(label = scales::percent(percent, accuracy=0.1)), vjust=-0.5) + 
  facet_wrap(~type) + 
  scale_y_continuous(expand=c(0,0), labels=scales::percent, limits=c(0,0.9)) + 
  scale_x_continuous(breaks=c(1:5)) + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5)) + 
  xlab("") + ylab("") + ggtitle("Class Membership, Citizens and Politicians")

# plot 2: distributions with means 
cluster.dist <- cdat %>% pivot_longer(cols = contains("Q")) %>%
  rename(question = name) %>% 
  left_join(., q.labels, by="question")
p2 <- ggplot(cluster.dist, aes(x=factor(value))) + geom_bar() + 
  geom_vline(xintercept=6, linetype="dotted") + 
  #  geom_text(data=means, aes(x=10, y=2250, label=mean)) + 
  facet_grid(class~short_label) + 
  scale_y_continuous(expand=c(0,0)) + 
  xlab("") + ylab("") + theme_minimal() + theme(panel.border = element_rect(fill=NA, linewidth = 0.5),
                                                strip.text = element_text(hjust=0, vjust=0, size=8)) 
p1 / p2 + plot_layout(heights=c(1,4))
ggsave("sm_figure_SM14.pdf", width=10, height=8)

means <- cluster.dist %>% group_by(type, class, short_label) %>% 
  summarise(mean = mean(value))

###############################################################################
# Figure SM15: Predicted Probabilities of Latent Class Membership, LCR Model

# latent class regression model
lca.prepped$type <- as.numeric(car::recode(cdat$type, "'Politicians'=1;'Citizens'=0"))
lca.prepped <- lca.prepped %>% dplyr::select(contains("Q"), type)
set.seed(12345)
lcr <- poLCA::poLCA(cbind(Q70, Q71, Q72, Q73, Q75, Q76, Q77, Q78)~type,lca.prepped,nclass=4,nrep=5)

# calculate and plot predicted probabilities
sim <- data.frame(Intercept = 1, type = c(0,1))
X <- model.matrix(~type, data=sim)
coef <- lcr$coeff
Xb1 <- t(X %*% coef[,1])
Xb2 <- t(X %*% coef[,2])
Xb3 <- t(X %*% coef[,3])
p1 <- 1 / (1 + exp(Xb1) + exp(Xb2) + exp(Xb3))
p2 <- exp(Xb1) / (1 + exp(Xb1) + exp(Xb2) + exp(Xb3))
p3 <- exp(Xb2) / (1 + exp(Xb1) + exp(Xb2) + exp(Xb3))
p4 <- exp(Xb3) / (1 + exp(Xb1) + exp(Xb2) + exp(Xb3))
probs <- data.frame(rbind(p1, p2, p3, p4)) %>% pivot_longer(cols = c(1:2)) %>%
  mutate(class = c(1,1,2,2,3,3,4,4))
probs$name <- factor(probs$name, labels=c("Citizens", "Politicians"))
ggplot(probs, aes(x=class, y=value, group=name, fill=name)) + 
  geom_bar(stat="identity", position=position_dodge()) + 
  scale_fill_manual(values=c("#1b9e77", "#7570b3")) + 
  scale_y_continuous(expand=c(0,0),limits=c(0,1)) + ylab("Probability of Latent Class Membership") + 
  scale_x_continuous(breaks=c(1:4), labels=c("Inattentive","Undecided","Democratic\nRealism","Democratic\nOptimism")) + xlab("\nLatent Class") + 
  theme_minimal() + theme(panel.border = element_rect(fill=NA, size=0.5),
                          strip.text = element_text(hjust=0, vjust=0),
                          legend.position = "bottom", 
                          legend.title = element_blank())
ggsave("sm_figure_SM15.pdf", width=8.5, height=5)

